{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Evaluating RMSE, MAE of algorithm KNNBasic.\n",
      "\n",
      "------------\n",
      "Fold 1\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9876\n",
      "MAE:  0.7807\n",
      "------------\n",
      "Fold 2\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9871\n",
      "MAE:  0.7796\n",
      "------------\n",
      "Fold 3\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "RMSE: 0.9902\n",
      "MAE:  0.7818\n",
      "------------\n",
      "------------\n",
      "Mean RMSE: 0.9883\n",
      "Mean MAE : 0.7807\n",
      "------------\n",
      "------------\n",
      "        Fold 1  Fold 2  Fold 3  Mean    \n",
      "MAE     0.7807  0.7796  0.7818  0.7807  \n",
      "RMSE    0.9876  0.9871  0.9902  0.9883  \n"
     ]
    }
   ],
   "source": [
    "from surprise import KNNBasic,SVD\n",
    "from surprise import Dataset\n",
    "from surprise import evaluate, print_perf\n",
    "# http://surprise.readthedocs.io/en/stable/index.html\n",
    "# http://files.grouplens.org/datasets/movielens/ml-100k-README.txt\n",
    "\n",
    "# Load the movielens-100k dataset (download it if needed),\n",
    "# and split it into 3 folds for cross-validation.\n",
    "data = Dataset.load_builtin('ml-100k')\n",
    "data.split(n_folds=3)\n",
    "\n",
    "# We'll use the famous KNNBasic algorithm.\n",
    "algo = KNNBasic()\n",
    "\n",
    "# Evaluate performances of our algorithm on the dataset.\n",
    "perf = evaluate(algo, data, measures=['RMSE', 'MAE'])\n",
    "\n",
    "print_perf(perf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------\n",
      "Parameters combination 1 of 8\n",
      "params:  {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.4}\n",
      "------------\n",
      "Mean RMSE: 0.9972\n",
      "Mean FCP : 0.6843\n",
      "------------\n",
      "------------\n",
      "Parameters combination 2 of 8\n",
      "params:  {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.4}\n",
      "------------\n",
      "Mean RMSE: 0.9734\n",
      "Mean FCP : 0.6946\n",
      "------------\n",
      "------------\n",
      "Parameters combination 3 of 8\n",
      "params:  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.4}\n",
      "------------\n",
      "Mean RMSE: 0.9777\n",
      "Mean FCP : 0.6926\n",
      "------------\n",
      "------------\n",
      "Parameters combination 4 of 8\n",
      "params:  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}\n",
      "------------\n",
      "Mean RMSE: 0.9635\n",
      "Mean FCP : 0.6987\n",
      "------------\n",
      "------------\n",
      "Parameters combination 5 of 8\n",
      "params:  {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.6}\n",
      "------------\n",
      "Mean RMSE: 1.0029\n",
      "Mean FCP : 0.6875\n",
      "------------\n",
      "------------\n",
      "Parameters combination 6 of 8\n",
      "params:  {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.6}\n",
      "------------\n",
      "Mean RMSE: 0.9820\n",
      "Mean FCP : 0.6953\n",
      "------------\n",
      "------------\n",
      "Parameters combination 7 of 8\n",
      "params:  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.6}\n",
      "------------\n",
      "Mean RMSE: 0.9860\n",
      "Mean FCP : 0.6943\n",
      "------------\n",
      "------------\n",
      "Parameters combination 8 of 8\n",
      "params:  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.6}\n",
      "------------\n",
      "Mean RMSE: 0.9733\n",
      "Mean FCP : 0.6991\n",
      "------------\n"
     ]
    }
   ],
   "source": [
    "from surprise import GridSearch\n",
    "\n",
    "param_grid = {'n_epochs': [5, 10], 'lr_all': [0.002, 0.005],\n",
    "              'reg_all': [0.4, 0.6]}\n",
    "grid_search = GridSearch(SVD, param_grid, measures=['RMSE', 'FCP'])\n",
    "data = Dataset.load_builtin('ml-100k')\n",
    "data.split(n_folds=3)\n",
    "\n",
    "grid_search.evaluate(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.963501988854\n",
      "{'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}\n",
      "0.699084153002\n",
      "{'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.6}\n"
     ]
    }
   ],
   "source": [
    "# best RMSE score\n",
    "print(grid_search.best_score['RMSE'])\n",
    "\n",
    "# combination of parameters that gave the best RMSE score\n",
    "print(grid_search.best_params['RMSE'])\n",
    "\n",
    "\n",
    "# best FCP score\n",
    "print(grid_search.best_score['FCP'])\n",
    "\n",
    "\n",
    "# combination of parameters that gave the best FCP score\n",
    "print(grid_search.best_params['FCP'])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>FCP</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>lr_all</th>\n",
       "      <th>n_epochs</th>\n",
       "      <th>params</th>\n",
       "      <th>scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.684266</td>\n",
       "      <td>0.997160</td>\n",
       "      <td>0.002</td>\n",
       "      <td>5</td>\n",
       "      <td>{'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.4}</td>\n",
       "      <td>{'RMSE': 0.997160189649, 'FCP': 0.684266412476}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.694552</td>\n",
       "      <td>0.973383</td>\n",
       "      <td>0.005</td>\n",
       "      <td>5</td>\n",
       "      <td>{'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.4}</td>\n",
       "      <td>{'RMSE': 0.973383132387, 'FCP': 0.694551932996}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.692616</td>\n",
       "      <td>0.977697</td>\n",
       "      <td>0.002</td>\n",
       "      <td>10</td>\n",
       "      <td>{'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.4}</td>\n",
       "      <td>{'RMSE': 0.977696629511, 'FCP': 0.692615513155}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.698722</td>\n",
       "      <td>0.963502</td>\n",
       "      <td>0.005</td>\n",
       "      <td>10</td>\n",
       "      <td>{'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}</td>\n",
       "      <td>{'RMSE': 0.963501988854, 'FCP': 0.698721750945}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.687482</td>\n",
       "      <td>1.002855</td>\n",
       "      <td>0.002</td>\n",
       "      <td>5</td>\n",
       "      <td>{'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.6}</td>\n",
       "      <td>{'RMSE': 1.00285516237, 'FCP': 0.687481665759}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.695337</td>\n",
       "      <td>0.982047</td>\n",
       "      <td>0.005</td>\n",
       "      <td>5</td>\n",
       "      <td>{'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.6}</td>\n",
       "      <td>{'RMSE': 0.98204676013, 'FCP': 0.695337489535}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0.694338</td>\n",
       "      <td>0.985981</td>\n",
       "      <td>0.002</td>\n",
       "      <td>10</td>\n",
       "      <td>{'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.6}</td>\n",
       "      <td>{'RMSE': 0.985980855401, 'FCP': 0.694337564062}</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0.699084</td>\n",
       "      <td>0.973282</td>\n",
       "      <td>0.005</td>\n",
       "      <td>10</td>\n",
       "      <td>{'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.6}</td>\n",
       "      <td>{'RMSE': 0.973281870802, 'FCP': 0.699084153002}</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        FCP      RMSE  lr_all  n_epochs  \\\n",
       "0  0.684266  0.997160   0.002         5   \n",
       "1  0.694552  0.973383   0.005         5   \n",
       "2  0.692616  0.977697   0.002        10   \n",
       "3  0.698722  0.963502   0.005        10   \n",
       "4  0.687482  1.002855   0.002         5   \n",
       "5  0.695337  0.982047   0.005         5   \n",
       "6  0.694338  0.985981   0.002        10   \n",
       "7  0.699084  0.973282   0.005        10   \n",
       "\n",
       "                                              params  \\\n",
       "0   {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.4}   \n",
       "1   {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.4}   \n",
       "2  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.4}   \n",
       "3  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.4}   \n",
       "4   {'lr_all': 0.002, 'n_epochs': 5, 'reg_all': 0.6}   \n",
       "5   {'lr_all': 0.005, 'n_epochs': 5, 'reg_all': 0.6}   \n",
       "6  {'lr_all': 0.002, 'n_epochs': 10, 'reg_all': 0.6}   \n",
       "7  {'lr_all': 0.005, 'n_epochs': 10, 'reg_all': 0.6}   \n",
       "\n",
       "                                            scores  \n",
       "0  {'RMSE': 0.997160189649, 'FCP': 0.684266412476}  \n",
       "1  {'RMSE': 0.973383132387, 'FCP': 0.694551932996}  \n",
       "2  {'RMSE': 0.977696629511, 'FCP': 0.692615513155}  \n",
       "3  {'RMSE': 0.963501988854, 'FCP': 0.698721750945}  \n",
       "4   {'RMSE': 1.00285516237, 'FCP': 0.687481665759}  \n",
       "5   {'RMSE': 0.98204676013, 'FCP': 0.695337489535}  \n",
       "6  {'RMSE': 0.985980855401, 'FCP': 0.694337564062}  \n",
       "7  {'RMSE': 0.973281870802, 'FCP': 0.699084153002}  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd  \n",
    "\n",
    "results_df = pd.DataFrame.from_dict(grid_search.cv_results)\n",
    "results_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimating biases using als...\n",
      "Computing the pearson_baseline similarity matrix...\n",
      "Done computing similarity matrix.\n"
     ]
    }
   ],
   "source": [
    "from __future__ import (absolute_import, division, print_function,\n",
    "                        unicode_literals)\n",
    "import os\n",
    "import io\n",
    "\n",
    "from surprise import KNNBaseline\n",
    "from surprise import Dataset\n",
    "\n",
    "\n",
    "def read_item_names():\n",
    "\n",
    "\n",
    "    file_name = ('./ml-100k/u.item')\n",
    "    rid_to_name = {}\n",
    "    name_to_rid = {}\n",
    "    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:\n",
    "        for line in f:\n",
    "            line = line.split('|')\n",
    "            rid_to_name[line[0]] = line[1]\n",
    "            name_to_rid[line[1]] = line[0]\n",
    "\n",
    "    return rid_to_name, name_to_rid\n",
    "\n",
    "\n",
    "\n",
    "data = Dataset.load_builtin('ml-100k')\n",
    "trainset = data.build_full_trainset()\n",
    "sim_options = {'name': 'pearson_baseline', 'user_based': False}\n",
    "algo = KNNBaseline(sim_options=sim_options)\n",
    "algo.train(trainset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1053'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rid_to_name, name_to_rid = read_item_names()\n",
    "\n",
    "toy_story_raw_id = name_to_rid['Now and Then (1995)']\n",
    "toy_story_raw_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "961"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "toy_story_inner_id = algo.trainset.to_inner_iid(toy_story_raw_id)\n",
    "toy_story_inner_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[291, 82, 366, 528, 179, 101, 556, 310, 431, 543]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "toy_story_neighbors = algo.get_neighbors(toy_story_inner_id, k=10)\n",
    "toy_story_neighbors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "The 10 nearest neighbors of Toy Story are:\n",
      "While You Were Sleeping (1995)\n",
      "Batman (1989)\n",
      "Dave (1993)\n",
      "Mrs. Doubtfire (1993)\n",
      "Groundhog Day (1993)\n",
      "Raiders of the Lost Ark (1981)\n",
      "Maverick (1994)\n",
      "French Kiss (1995)\n",
      "Stand by Me (1986)\n",
      "Net, The (1995)\n"
     ]
    }
   ],
   "source": [
    "toy_story_neighbors = (algo.trainset.to_raw_iid(inner_id)\n",
    "                       for inner_id in toy_story_neighbors)\n",
    "toy_story_neighbors = (rid_to_name[rid]\n",
    "                       for rid in toy_story_neighbors)\n",
    "\n",
    "print()\n",
    "print('The 10 nearest neighbors of Toy Story are:')\n",
    "for movie in toy_story_neighbors:\n",
    "    print(movie)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
